{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dimensionality reduction with autoencoders\n",
    "- Autoencoders can be used to reduce dimensionality of the dataset\n",
    "- They are powerful compared to methods such as PCA since they can involve nonlinear transformations (i.e., nonlinear activation functions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn import datasets\n",
    "from keras.layers import Input, Dense\n",
    "from keras.models import Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data = datasets.load_digits()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X_data = data.images\n",
    "y_data = data.target\n",
    "\n",
    "X_data = X_data.reshape(X_data.shape[0], 64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# fit in data instances into interval [0,1]\n",
    "X_data = X_data / 16.\n",
    "X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size = 0.3, random_state = 777)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1257, 64)\n",
      "(540, 64)\n",
      "(1257,)\n",
      "(540,)\n"
     ]
    }
   ],
   "source": [
    "print(X_train.shape)\n",
    "print(X_test.shape)\n",
    "print(y_train.shape)\n",
    "print(y_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# define coding dimension. Coding dimension will be the size of reduced data dimension\n",
    "code_dim = 16"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def auto_encoder_model():    \n",
    "    inputs = Input(shape = (X_train.shape[1],), name = 'input')                         # input layer\n",
    "    code = Dense(code_dim, activation = 'relu', name = 'code')(inputs)                  # hidden layer => represents \"codes\"\n",
    "    outputs = Dense(X_train.shape[1], activation = 'softmax', name = 'output')(code)    # output layer\n",
    "\n",
    "    auto_encoder = Model(inputs = inputs, outputs = outputs)\n",
    "\n",
    "    encoder = Model(inputs = inputs, outputs = code)\n",
    "\n",
    "    decoder_input = Input(shape = (code_dim,))\n",
    "    decoder_output = auto_encoder.layers[-1]\n",
    "    decoder = Model(inputs = decoder_input, outputs = decoder_output(decoder_input))\n",
    "\n",
    "    auto_encoder.compile(optimizer='adam', loss='binary_crossentropy')\n",
    "    return encoder, decoder, auto_encoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "encoder, decoder, auto_encoder = auto_encoder_model()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wall time: 3.68 s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x165cf8d0>"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "auto_encoder.fit(X_train, X_train, epochs = 100, batch_size = 50, validation_data = (X_test, X_test), verbose = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# generate reduced data by using \"encoders\"\n",
    "training_data_reduced = encoder.predict(X_train)\n",
    "test_data_reduced = encoder.predict(X_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Data instances are reduced to dimensionality of 16 (= coding dimension)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 3.80530763  1.40662622  2.70148444  4.4706459   1.58172095  3.037673\n",
      "  2.62502003  0.76960731  2.0610199   1.76251006  3.91513109  3.0303731\n",
      "  3.48311281  1.46974468  3.43927789  3.81914759]\n",
      "[ 2.02643633  2.05360436  1.0553565   3.56827545  3.66347456  3.34595394\n",
      "  3.43908119  2.14790154  1.23506999  2.21335554  1.63356566  1.7646265\n",
      "  2.78458714  1.40197086  1.6926899   2.78371859]\n"
     ]
    }
   ],
   "source": [
    "print(training_data_reduced[0])    # first insance of reduced training data\n",
    "print(test_data_reduced[0])        # first instance of reduced test data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1257, 16)\n",
      "(540, 16)\n"
     ]
    }
   ],
   "source": [
    "print(training_data_reduced.shape)\n",
    "print(test_data_reduced.shape)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
